Library necessary packages

library(NMF)
library(rsvd)
library(Rtsne)
library(ggplot2)
library(cowplot)
library(sva)
library(igraph)
library(cccd)
library(KernSmooth)
library(beeswarm)
library(stringr)
library(reshape2)
library(formatR)
library(destiny)
source("Fxns.R")

Load data

SalmHelm_UMIs <- load_data("./Extend_data/GSE92332_SalmHelm_UMIcounts.txt.gz")
## [1] "File exists!"
## 2017-12-26 03:09:40 INFO: Data dimensions: 15215x9842
SalmHelm_UMIs <- SalmHelm_UMIs[which(unlist(apply(SalmHelm_UMIs, 1, sum)) > 
    0), ]
SalmHelm_tpm <- data.frame(log2(1 + tpm(SalmHelm_UMIs)))
## 2017-12-26 03:09:51 INFO: Running TPM normalisation

Select variables

v = get.variable.genes(SalmHelm_UMIs, min.cv2 = 100)
## 2017-12-26 03:10:34 INFO: Fitting only the 10363 genes with mean expression > 0.0128022759601707

## 2017-12-26 03:10:36 INFO: Found 1936 variable genes (p<0.05)
var.genes = as.character(rownames(v)[v$p.adj < 0.05])  # select genes

Extract message from sample

SalmHelm.all.cells <- colnames(SalmHelm_UMIs)
SalmHelm.all.genes <- rownames(SalmHelm_UMIs)

SalmHelm.condition <- unlist(lapply(SalmHelm.all.cells, function(x) return(str_split(x, 
    "_")[[1]][3])))  # cell by conditon
SalmHelm.cell.groups <- unlist(lapply(SalmHelm.all.cells, function(x) return(str_split(x, 
    "_")[[1]][4])))  # cell groups
SalmHelm.batches <- unlist(lapply(SalmHelm.all.cells, function(x) return(str_split(x, 
    "_")[[1]][1])))  #  batches

Batch effect

# check batch effect
table(SalmHelm.batches)
## SalmHelm.batches
##   B1  B10   B2   B3   B4   B5   B6   B7   B8   B9 
##  840  950  200 1258  942 1490  631 1169 1542  820
batch_mean_tpm = group.means(counts = SalmHelm_tpm, groups = SalmHelm.batches)
x = batch_mean_tpm[, 1]
y = batch_mean_tpm[, 2]
expr.cor = round(cor(x, y), 2)
smoothScatter(x, y, nrpoints = Inf, pch = 16, cex = 0.25, main = sprintf("Before batch correction, correlation between \ntwo illustrative batches is %s", 
    expr.cor), xlab = "All genes Batch 2, mean log2(TPM+1)", ylab = "All genes Batch  1, mean log2(TPM+1)")

# remove the batch effect
SalmHelm_tpm_norm = batch.normalise.comBat(counts = as.matrix(SalmHelm_tpm), 
    batch.groups = SalmHelm.batches)
## Standardizing Data across genes
batch_mean_tpm_norm = group.means(counts = SalmHelm_tpm_norm, groups = SalmHelm.batches)
x = batch_mean_tpm_norm[, 1]
y = batch_mean_tpm_norm[, 2]
expr.cor = round(cor(x, y), 2)
smoothScatter(x, y, nrpoints = Inf, pch = 16, cex = 0.25, main = sprintf("After batch correction, correlation between \ntwo illustrative batches is %s", 
    expr.cor), xlab = "All genes Batch 2, mean log2(TPM+1)", ylab = "All genes Batch  1, mean log2(TPM+1)")

Dimensionality reduction:PCA,tSNE

SalmHelm.tsne.rot <- PCA_TSNE.scores(data.tpm = SalmHelm_tpm_norm, data.umis = SalmHelm_UMIs, 
    var_genes = var.genes, data_name = "./Extend_data/SalmHelm", sig.pcs = TRUE, 
    is.var.genes = TRUE)
## ./Extend_data/SalmHelm_pca_scores.txt exists
## ./Extend_data/SalmHelm_tsne_scores.txt exists
SalmHelm.tsne.rot <- data.frame(SalmHelm.tsne.rot)
colnames(SalmHelm.tsne.rot) <- c("tSNE_1", "tSNE_2")

ggplot(data = SalmHelm.tsne.rot, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(colour = SalmHelm.cell.groups)) + 
    scale_color_manual(values = brewer16) + scale_fill_discrete() + theme(legend.title = element_blank()) + 
    ggtitle("SalmHelm")

ggplot(data = SalmHelm.tsne.rot, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(colour = SalmHelm.condition)) + 
    scale_color_manual(values = brewer16) + scale_fill_discrete() + theme(legend.title = element_blank()) + 
    ggtitle("SalmHelm")

Figure a

genes <- c("Cd74", "Gpx2", "Hsph1")
All.Facet.tsne <- data.frame()
for (gene in genes) {
    All.Facet.tsne <- rbind(All.Facet.tsne, Facet_wrap_fun(gene = gene, tpm.data = SalmHelm_tpm_norm, 
        tsne.data = SalmHelm.tsne.rot, condition = unique(SalmHelm.condition), 
        all.condition = SalmHelm.condition))
}
## There ara 4 conditions
## Whether creat data accurate 1 
## There ara 4 conditions
## Whether creat data accurate 1 
## There ara 4 conditions
## Whether creat data accurate 1
ggplot(data = All.Facet.tsne[All.Facet.tsne$Gene %in% genes[1], ], aes(x = tSNE_1, 
    y = tSNE_2, colour = Gene.Mp)) + geom_point() + ggtitle(genes[1]) + facet_wrap(~Condition, 
    nrow = 5, ncol = 2) + theme(legend.title = element_text(size = 10, color = "blue", 
    face = "bold"), legend.position = "right") + scale_color_gradient2(low = "lightblue", 
    mid = "green", high = "red", name = "Log2\nTPM+1")

ggplot(data = All.Facet.tsne[All.Facet.tsne$Gene %in% genes[2], ], aes(x = tSNE_1, 
    y = tSNE_2, colour = Gene.Mp)) + geom_point() + ggtitle(genes[2]) + facet_wrap(~Condition, 
    nrow = 5, ncol = 2) + theme(legend.title = element_text(size = 10, color = "blue", 
    face = "bold"), legend.position = "right") + scale_color_gradient2(low = "lightblue", 
    mid = "green", high = "red", name = "Log2\nTPM+1")

ggplot(data = All.Facet.tsne[All.Facet.tsne$Gene %in% genes[3], ], aes(x = tSNE_1, 
    y = tSNE_2, colour = Gene.Mp)) + geom_point() + ggtitle(genes[3]) + facet_wrap(~Condition, 
    nrow = 5, ncol = 2) + theme(legend.title = element_text(size = 10, color = "blue", 
    face = "bold"), legend.position = "right") + scale_color_gradient2(low = "lightblue", 
    mid = "green", high = "red", name = "Log2\nTPM+1")

Figure b

SalmHelm.Tuft.tpm <- SalmHelm_tpm_norm[, SalmHelm.cell.groups %in% "Tuft"]
SalmHelm.Tuft.tsne <- SalmHelm.tsne.rot[SalmHelm.cell.groups %in% "Tuft", ]
SalmHelm.Tuft.pca <- read.table("./Extend_data/SalmHelm_pca_scores.txt")[SalmHelm.cell.groups %in% 
    "Tuft", ]

### Find the k to make 3 cluster k-nearest method
### Find_K<-function(K,pca.data,n=3){ dm<-as.matrix(dist(pca.data)) for(k in
### K){ knn<-build_knn_graph(dm,k=k) clustering<-cluster_graph(knn)$partition
### if(length(unique(clustering))==n){ cat(sprintf('Find the K:%d\n',k))
### return(k) break } } } K<-seq(2,200,by = 2) k<-Find_K(K = K,pca.data =
### SalmHelm.Tuft.pca[,1:15])

dm <- as.matrix(dist(SalmHelm.Tuft.pca[, 1:15]))
# build nearest neighbor graph
knn = build_knn_graph(dm, k = 74)
Tuft.clustering = cluster_graph(knn)$partition

Tuft_1_marker_genes <- as.character(read.table("./Extend_data/Tuft_1_marker_genes.txt")$V1)

Tuft_2_marker_genes <- as.character(read.table("./Extend_data/Tuft_2_marker_genes.txt")$V1)

Tuft.clustering <- paste("Tuft-", Tuft.clustering, sep = "")
Tuft.clustering <- ifelse(Tuft.clustering == "Tuft-1", "Tuft-1", ifelse(Tuft.clustering == 
    "Tuft-2", "Tuft-2", "Progenitor"))

score_1 <- unlist(apply(SalmHelm.Tuft.tpm[Tuft_1_marker_genes, ], 2, mean, na.rm = TRUE))
score_2 <- unlist(apply(SalmHelm.Tuft.tpm[Tuft_2_marker_genes, ], 2, mean, na.rm = TRUE))
Dklk1 <- as.numeric(SalmHelm.Tuft.tpm["Dclk1", ])

Violin.data1 <- cbind(data.frame(Value = score_1), data.frame(Group = Tuft.clustering))
Violin.data2 <- cbind(data.frame(Value = score_2), data.frame(Group = Tuft.clustering))
Dklk1.data <- cbind(data.frame(Value = Dklk1), data.frame(Group = Tuft.clustering))

Figure b.1

ggplot(data = Violin.data1, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) + 
    geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2), 
    color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Tuft-1 score") + 
    xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")

# stat_summary(fun.data=mean_sdl,geom='pointrange', color='red')

Figure b.2

ggplot(data = Violin.data2, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) + 
    geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2), 
    color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Tuft-2 score") + 
    xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")

Figure b.3

ggplot(data = Dklk1.data, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) + 
    geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2), 
    color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Dckl1") + 
    xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")

Figure c

## Figure c.1

ggplot(SalmHelm.Tuft.tsne, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(color = score_1)) + 
    theme(legend.title = element_text(size = 8, color = "blue", face = "bold"), 
        legend.position = "right") + ggtitle("Tuft-1 Score") + scale_color_gradient2(low = "lightblue", 
    mid = "green", high = "red", name = "Log2\nTPM+1")

## Figure c.2

ggplot(SalmHelm.Tuft.tsne, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(color = score_2)) + 
    theme(legend.title = element_text(size = 8, color = "blue", face = "bold"), 
        legend.position = "right") + ggtitle("Tuft-2 Score") + scale_color_gradient2(low = "lightblue", 
    mid = "green", high = "red", name = "Log2\nTPM+1")

## Figure c.3
ggplot(data = SalmHelm.Tuft.tsne, aes(x = tSNE_1, y = tSNE_2)) + geom_point(aes(colour = Tuft.clustering)) + 
    scale_color_manual(values = brewer16) + scale_fill_discrete() + theme(legend.title = element_blank()) + 
    ggtitle("Tuft")

Figure d

Induction of antiparasitic genes by goblet cells after helminth infection

SalmHelm.Goblet.tpm <- SalmHelm_tpm_norm[, SalmHelm.cell.groups %in% "Goblet"]
SalmHelm.Goblet.condiion <- SalmHelm.condition[SalmHelm.cell.groups %in% "Goblet"]
SalmHelm.Goblet.condition.tpm <- SalmHelm.Goblet.tpm[, SalmHelm.Goblet.condiion %in% 
    c("Control", "Hpoly.Day3", "Hpoly.Day10")]
SalmHelm.Goblet.condiion3 <- SalmHelm.Goblet.condiion[SalmHelm.Goblet.condiion %in% 
    c("Control", "Hpoly.Day3", "Hpoly.Day10")]

Retnlb <- as.numeric(SalmHelm.Goblet.condition.tpm["Retnlb", ])
Wars <- as.numeric(SalmHelm.Goblet.condition.tpm["Wars", ])
Pnliprp2 <- as.numeric(SalmHelm.Goblet.condition.tpm["Pnliprp2", ])

Violin.Retnlb <- cbind(data.frame(Value = Retnlb), data.frame(Group = SalmHelm.Goblet.condiion3))
Violin.Wars <- cbind(data.frame(Value = Wars), data.frame(Group = SalmHelm.Goblet.condiion3))
Violin.Pnliprp2 <- cbind(data.frame(Value = Pnliprp2), data.frame(Group = SalmHelm.Goblet.condiion3))
## Figure d.1
ggplot(data = Violin.Retnlb, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) + 
    geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2), 
    color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Retnlb") + 
    xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")

## Figure d.2
ggplot(data = Violin.Wars, aes(x = Group, y = Value, colour = Group)) + geom_violin(aes(fill = Group)) + 
    geom_boxplot(width = 0.1, fill = "white") + geom_jitter(shape = 16, position = position_jitter(0.2), 
    color = "black") + theme(legend.title = element_blank()) + ggtitle(label = "Wars") + 
    xlab(NULL) + ylab("Expression Level\nLog2(TPM+1)")

## Figure d.3
ggplot(data = Violin.Pnliprp2, aes(x = Group, y = Value, colour = Group)) + 
    geom_violin(aes(fill = Group)) + geom_boxplot(width = 0.1, fill = "white") + 
    geom_jitter(shape = 16, position = position_jitter(0.2), color = "black") + 
    theme(legend.title = element_blank()) + ggtitle(label = "Pnliprp2") + xlab(NULL) + 
    ylab("Expression Level\nLog2(TPM+1)")